{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "Created on Sun Jul  8 22:33:44 2018\n",
    "\n",
    "@author: hb65402\n",
    "\"\"\"\n",
    "\n",
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "Created on Thu Jun  7 08:14:38 2018\n",
    "\n",
    "@author: hb65402\n",
    "\"\"\"\n",
    "import numpy\n",
    "from math import sqrt\n",
    "from numpy import concatenate\n",
    "from matplotlib import pyplot\n",
    "from pandas import read_csv\n",
    "from pandas import DataFrame\n",
    "from pandas import concat\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.layers import LSTM\n",
    "import matplotlib.pyplot as plt \n",
    "# convert series to supervised learning\n",
    "def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):\n",
    "\tn_vars = 1 if type(data) is list else data.shape[1]\n",
    "\tdf = DataFrame(data)\n",
    "\tcols, names = list(), list()\n",
    "\t# input sequence (t-n, ... t-1)\n",
    "\tfor i in range(n_in, 0, -1):\n",
    "\t\tcols.append(df.shift(i))\n",
    "\t\tnames += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "\t# forecast sequence (t, t+1, ... t+n)\n",
    "\tfor i in range(0, n_out):\n",
    "\t\tcols.append(df.shift(-i))\n",
    "\t\tif i == 0:\n",
    "\t\t\tnames += [('var%d(t)' % (j+1)) for j in range(n_vars)]\n",
    "\t\telse:\n",
    "\t\t\tnames += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "\t# put it all together\n",
    "\tagg = concat(cols, axis=1)\n",
    "\tagg.columns = names\n",
    "   \n",
    "\t# drop rows with NaN values\n",
    "\tif dropnan:\n",
    "\t\tagg.dropna(inplace=True)\n",
    "\treturn agg\n",
    " \n",
    "# load dataset\n",
    "dataset = read_csv('sampledata.csv', header=0, index_col=0)\n",
    "values = dataset.values\n",
    "# integer encode direction\n",
    "encoder = LabelEncoder()\n",
    "values[:,4] = encoder.fit_transform(values[:,4])\n",
    "# ensure all data is float\n",
    "values = values.astype('float32')\n",
    "# normalize features\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "scaled = scaler.fit_transform(values)\n",
    "# specify the number of lag hours\n",
    "n_hours = 3\n",
    "n_features = 8\n",
    "# frame as supervised learning\n",
    "reframed = series_to_supervised(scaled, n_hours, 1)\n",
    "#print(reframed)\n",
    " \n",
    "# split into train and test sets\n",
    "values = reframed.values\n",
    "\n",
    "n_train_hours = 365*24\n",
    "train = values[:n_train_hours, :]\n",
    "test = values[n_train_hours:, :]\n",
    "# split into input and outputs\n",
    "n_obs = n_hours * n_features\n",
    "train_X, train_y = train[:, :n_obs], train[:, -n_features]\n",
    "test_X, test_y = test[:, :n_obs], test[:, -n_features]\n",
    "print(train_X.shape, len(train_X), train_y.shape)\n",
    "# reshape input to be 3D [samples, timesteps, features]\n",
    "train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))\n",
    "test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))\n",
    "print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)\n",
    "\n",
    "# design network\n",
    "model = Sequential()\n",
    "model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))\n",
    "model.add(Dense(1))\n",
    "model.compile(loss='mae', optimizer='adam')\n",
    "# fit network\n",
    "history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)\n",
    "# plot history\n",
    "pyplot.plot(history.history['loss'], label='train')\n",
    "pyplot.plot(history.history['val_loss'], label='test')\n",
    "pyplot.legend()\n",
    "pyplot.show()\n",
    "n_f = n_features - 1\n",
    "# make a prediction\n",
    "yhat = model.predict(test_X)\n",
    "test_X = test_X.reshape((test_X.shape[0], n_hours*n_features))\n",
    "# invert scaling for forecast\n",
    "inv_yhat = concatenate((yhat, test_X[:, -n_f:]), axis=1)\n",
    "inv_yhat = scaler.inverse_transform(inv_yhat)\n",
    "inv_yhat = inv_yhat[:,0]\n",
    "# invert scaling for actual\n",
    "test_y = test_y.reshape((len(test_y), 1))\n",
    "inv_y = concatenate((test_y, test_X[:, -n_f:]), axis=1)\n",
    "inv_y = scaler.inverse_transform(inv_y)\n",
    "inv_y = inv_y[:,0]\n",
    "\n",
    "plt.figure(2)\n",
    "plt.plot(inv_y)\n",
    "plt.plot(inv_yhat)\n",
    "\n",
    "# calculate RMSE\n",
    "rmse = sqrt(mean_squared_error(inv_y, inv_yhat))\n",
    "print('Test RMSE: %.3f' % rmse)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
